* ========================================================================= *
*   TEXAS INSTRUMENTS, INC.                                                 *
*                                                                           *
*   NAME                                                                    *
*       pix_expand                                                          *
*                                                                           *
*   USAGE                                                                   *
*       This routine is C-callable and can be called as:                    *
*                                                                           *
*       void pix_expand_asm                                                 *
*       (                                                                   *
*           int n,                                    /* # of elements */   *
*           const unsigned char *restrict in_data,    /* Input data    */   *
*           short               *restrict out_data    /* Output data   */   *
*       )                                                                   *
*                                                                           *
*                                                                           *
*   DESCRIPTION                                                             *
*       The code takes an array of bytes and promotes them to half-words    *
*       by zero-extension.                                                  *
*                                                                           *
*       This is the C equivalent of the assembly code, without              *
*       restrictions.  The assembly code has restrictions, as noted below.  *
*                                                                           *
*       void pix_expand                                                     *
*       (                                                                   *
*           int n,                                                          *
*           const unsigned char *restrict in_data,                          *
*           short               *restrict out_data                          *
*       )                                                                   *
*       {                                                                   *
*           int i;                                                          *
*                                                                           *
*           for (i = 0; i < n; i++)                                         *
*               out_data[i] =  in_data[i];                                  *
*       }                                                                   *
*                                                                           *
*   ASSUMPTIONS                                                             *
*       Input and output arrays must be double-word (8-byte) aligned.       *
*                                                                           *
*       The input must be at least 16 elements long and contain a           *
*       multiple of 16 elements.                                            *
*                                                                           *
*   NOTE                                                                    *
*       Interrupts are masked during the entire duration of this            *
*       function, as the entire function occurs within branch delay slots.  *
*                                                                           *
*   MEMORY NOTE                                                             *
*       No bank conflicts occur.  This is a LITTLE ENDIAN implementation.   *
*                                                                           *
*   TECHNIQUES                                                              *
*       The loop is unrolled 16 times, loading bytes with LDDW.  It uses    *
*       UNPKHU4 and UNPKLU4 to unpack the data and store the results with   *
*       STDW.                                                               *
*                                                                           *
*       To shave a few extra cycles from the function, the return branch    *
*       is issued from within the kernel.                                   *
*                                                                           *
*   CYCLES                                                                  *
*       cycles = 3 * (n / 16) + 15.                                         *
*       For n = 1072, cycles = 216.                                         *
*                                                                           *
*   CODESIZE                                                                *
*       100 bytes.                                                          *
*                                                                           *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2000 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *
        .include "pix_expand_h.h62"
_pix_expand_asm:
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
        .asg            A0,          A_i
        .asg            B1,          B_i 
        .asg            A2,          A_p_10
        .asg            A3,          A_p_32
        .asg            A4,          A_n
        .asg            A6,          A_o0
        .asg            A7,          A_i1
        .asg            A8,          A_p_3210
        .asg            A8,          A_p_98
        .asg            A9,          A_p_7654
        .asg            A9,          A_p_BA
        .asg            B0,          B_p
        .asg            B3,          B_ret
        .asg            B4,          B_i0
        .asg            B5,          B_o1
        .asg            B6,          B_p_BA98
        .asg            B6,          B_p_DC
        .asg            B7,          B_p_FE
        .asg            B7,          B_p_FEDC
        .asg            B8,          B_p_54
        .asg            B9,          B_p_76
* ========================================================================= *
* =========================== PIPE LOOP PROLOG ============================ *
        B               loop                                    ;[ 4,1]

        ADD             B_i0,       8,          A_i1
||      ADD             A_o0,       8,          B_o1
||      SHR             A_n,        4,          A_i

        LDDW    .D1T2   *A_i1++[2], B_p_FEDC:B_p_BA98           ;[ 1,1]
||      LDDW    .D2T1   *B_i0++[2], A_p_7654:A_p_3210           ;[ 1,1]
||      MVK             0xFFFF8000, B_p
* =========================== PIPE LOOP KERNEL ============================ *
loop:
  [!B_p]STDW    .D1T1   A_p_BA:A_p_98,          *A_o0[2]        ;[ 8,1]
||[!B_p]STDW    .D2T2   B_p_FE:B_p_DC,          *B_o1[2]        ;[ 8,1]
||[ A_i]BPOS    .S1     loop,       A_i                         ;[ 5,2]
||[!A_i]B       .S2     B_ret
||      SUB     .L1     A_i,        1,          A_i
||      ADD     .L2X    1,          A_i,        B_i             ;final load
||[ B_p]MPY     .M2     B_p,        2,          B_p

  [!B_p]STDW    .D1T1   A_p_32:A_p_10,          *A_o0++[4]      ;[ 9,1]
||[!B_p]STDW    .D2T2   B_p_76:B_p_54,          *B_o1++[4]      ;[ 9,1]
||      UNPKLU4 .S1     A_p_3210,   A_p_10                      ;[ 6,2]
||      UNPKHU4 .L1     A_p_3210,   A_p_32                      ;[ 6,2]
||      UNPKLU4 .L2X    A_p_7654,   B_p_54                      ;[ 6,2]
||      UNPKHU4 .S2X    A_p_7654,   B_p_76                      ;[ 6,2]

        UNPKLU4 .S1X    B_p_BA98,   A_p_98                      ;[ 7,2]
||      UNPKHU4 .L1X    B_p_BA98,   A_p_BA                      ;[ 7,2]
||      UNPKLU4 .L2     B_p_FEDC,   B_p_DC                      ;[ 7,2]
||      UNPKHU4 .S2     B_p_FEDC,   B_p_FE                      ;[ 7,2]
||[ B_i]LDDW    .D1T2   *A_i1++[2], B_p_FEDC:B_p_BA98           ;[ 1,4]
||[ B_i]LDDW    .D2T1   *B_i0++[2], A_p_7654:A_p_3210           ;[ 1,4]
* ========================================================================= *
* ========================================================================= *
*   End of file:  pix_expand_h.asm                                          *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2000 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *
